life <- read.csv("../Data/clean/life_clean.csv")
head(life)
## X age gender weight_kg height_m max_bpm avg_bpm resting_bpm
## 1 0 34.91 0 65.27 1.62 188.58 157.65 69.05
## 2 1 23.37 1 56.41 1.55 179.43 131.75 73.18
## 3 2 33.20 1 58.98 1.67 175.04 123.95 54.96
## 4 3 38.69 1 93.78 1.70 191.21 155.10 50.07
## 5 4 45.09 0 52.42 1.88 193.58 152.88 70.84
## 6 5 53.19 1 105.05 1.84 176.52 130.60 61.84
## session_duration_hours calories_burned workout_type fat_percentage
## 1 1.00 1080.90 0 26.80038
## 2 1.37 1809.91 1 27.65502
## 3 0.91 802.26 2 24.32082
## 4 1.10 1450.79 1 32.81357
## 5 1.08 1166.40 0 17.30732
## 6 0.69 453.33 3 32.04906
## water_intake_liters workout_frequency_days_week experience_level bmi
## 1 1.50 3.99 2.01 24.87
## 2 1.90 4.00 2.01 23.48
## 3 1.88 2.99 1.02 21.15
## 4 2.50 3.99 1.99 32.45
## 5 2.91 4.00 2.00 14.83
## 6 2.91 3.02 1.00 31.03
## daily_meals_frequency physical_exercise carbs proteins fats calories
## 1 2.99 0.01 267.68 106.05 71.63 1806
## 2 3.01 0.97 214.32 85.41 56.97 1577
## 3 1.99 -0.02 246.04 98.11 65.48 1608
## 4 3.00 0.04 203.22 80.84 54.56 2657
## 5 3.00 3.00 332.79 133.05 88.43 1470
## 6 2.99 -0.04 170.86 67.92 46.06 2767
## meal_type diet_type sugar_g sodium_mg cholesterol_mg serving_size_g
## 1 0 0 31.77 1729.94 285.05 120.47
## 2 0 1 12.34 693.08 300.61 109.15
## 3 1 2 42.81 2142.48 215.42 399.43
## 4 0 2 9.34 123.20 9.70 314.31
## 5 1 0 23.78 1935.11 116.89 99.22
## 6 2 3 15.89 2382.39 36.38 416.54
## cooking_method prep_time_min cook_time_min rating name_of_exercise sets reps
## 1 0 16.24 110.79 1.31 0 4.99 20.91
## 2 1 16.47 12.01 1.92 1 4.01 16.15
## 3 2 54.35 6.09 4.70 2 5.00 21.90
## 4 1 27.73 103.72 4.85 3 4.01 16.92
## 5 3 34.16 46.55 3.07 4 4.99 15.01
## 6 4 20.98 54.64 3.38 5 4.00 25.10
## benefit burns_calories_per_30min target_muscle_group equipment_needed
## 1 0 342.58 0 0
## 2 1 357.16 1 1
## 3 2 359.63 2 1
## 4 3 351.65 3 2
## 5 4 329.36 4 3
## 6 5 374.56 5 4
## difficulty_level body_part type_of_muscle workout bmi_calc cal_from_macros
## 1 0 0 0 0 24.87045 2139.59
## 2 1 1 0 1 23.47971 1711.65
## 3 1 2 1 2 21.14812 1965.92
## 4 0 3 2 3 32.44983 1627.28
## 5 0 4 3 4 14.83137 2659.23
## 6 2 2 3 5 31.02847 1369.66
## pct_carbs protein_per_kg pct_hrr pct_maxhr cal_balance lean_mass_kg
## 1 0.5004323 1.6247893 0.7412365 0.8359847 725.10 47.77739
## 2 0.5008501 1.5140932 0.5512471 0.7342696 -232.91 40.80980
## 3 0.5006104 1.6634452 0.5745336 0.7081239 805.74 44.63558
## 4 0.4995330 0.8620175 0.7441547 0.8111500 1206.21 63.00743
## 5 0.5005810 2.5381534 0.6684048 0.7897510 303.60 43.34750
## 6 0.4989851 0.6465493 0.5995814 0.7398595 2313.67 71.38246
## expected_burn burns_calories_.per_30_min._bc burns_calories_bin
## 1 685.1600 7.260425e+19 0
## 2 978.6184 1.020506e+20 1
## 3 654.5266 1.079607e+20 1
## 4 773.6300 8.987921e+19 1
## 5 711.4176 5.264685e+19 2
## 6 516.8928 1.505159e+20 3
colnames(life)
## [1] "X" "age"
## [3] "gender" "weight_kg"
## [5] "height_m" "max_bpm"
## [7] "avg_bpm" "resting_bpm"
## [9] "session_duration_hours" "calories_burned"
## [11] "workout_type" "fat_percentage"
## [13] "water_intake_liters" "workout_frequency_days_week"
## [15] "experience_level" "bmi"
## [17] "daily_meals_frequency" "physical_exercise"
## [19] "carbs" "proteins"
## [21] "fats" "calories"
## [23] "meal_type" "diet_type"
## [25] "sugar_g" "sodium_mg"
## [27] "cholesterol_mg" "serving_size_g"
## [29] "cooking_method" "prep_time_min"
## [31] "cook_time_min" "rating"
## [33] "name_of_exercise" "sets"
## [35] "reps" "benefit"
## [37] "burns_calories_per_30min" "target_muscle_group"
## [39] "equipment_needed" "difficulty_level"
## [41] "body_part" "type_of_muscle"
## [43] "workout" "bmi_calc"
## [45] "cal_from_macros" "pct_carbs"
## [47] "protein_per_kg" "pct_hrr"
## [49] "pct_maxhr" "cal_balance"
## [51] "lean_mass_kg" "expected_burn"
## [53] "burns_calories_.per_30_min._bc" "burns_calories_bin"
# dropping the index column and mutating
life <- life %>%
dplyr::select(-X) %>%
dplyr::mutate(across(where(is.character), as.factor))
glimpse(life)
## Rows: 20,000
## Columns: 53
## $ age <dbl> 34.91, 23.37, 33.20, 38.69, 45.09, 53.1…
## $ gender <int> 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, …
## $ weight_kg <dbl> 65.27, 56.41, 58.98, 93.78, 52.42, 105.…
## $ height_m <dbl> 1.62, 1.55, 1.67, 1.70, 1.88, 1.84, 1.7…
## $ max_bpm <dbl> 188.58, 179.43, 175.04, 191.21, 193.58,…
## $ avg_bpm <dbl> 157.65, 131.75, 123.95, 155.10, 152.88,…
## $ resting_bpm <dbl> 69.05, 73.18, 54.96, 50.07, 70.84, 61.8…
## $ session_duration_hours <dbl> 1.00, 1.37, 0.91, 1.10, 1.08, 0.69, 1.6…
## $ calories_burned <dbl> 1080.90, 1809.91, 802.26, 1450.79, 1166…
## $ workout_type <int> 0, 1, 2, 1, 0, 3, 0, 3, 0, 0, 0, 1, 1, …
## $ fat_percentage <dbl> 26.80038, 27.65502, 24.32082, 32.81357,…
## $ water_intake_liters <dbl> 1.50, 1.90, 1.88, 2.50, 2.91, 2.91, 2.7…
## $ workout_frequency_days_week <dbl> 3.99, 4.00, 2.99, 3.99, 4.00, 3.02, 4.9…
## $ experience_level <dbl> 2.01, 2.01, 1.02, 1.99, 2.00, 1.00, 3.0…
## $ bmi <dbl> 24.87, 23.48, 21.15, 32.45, 14.83, 31.0…
## $ daily_meals_frequency <dbl> 2.99, 3.01, 1.99, 3.00, 3.00, 2.99, 2.0…
## $ physical_exercise <dbl> 0.01, 0.97, -0.02, 0.04, 3.00, -0.04, -…
## $ carbs <dbl> 267.68, 214.32, 246.04, 203.22, 332.79,…
## $ proteins <dbl> 106.05, 85.41, 98.11, 80.84, 133.05, 67…
## $ fats <dbl> 71.63, 56.97, 65.48, 54.56, 88.43, 46.0…
## $ calories <int> 1806, 1577, 1608, 2657, 1470, 2767, 186…
## $ meal_type <int> 0, 0, 1, 0, 1, 2, 1, 2, 0, 0, 1, 2, 3, …
## $ diet_type <int> 0, 1, 2, 2, 0, 3, 4, 4, 0, 2, 3, 3, 0, …
## $ sugar_g <dbl> 31.77, 12.34, 42.81, 9.34, 23.78, 15.89…
## $ sodium_mg <dbl> 1729.94, 693.08, 2142.48, 123.20, 1935.…
## $ cholesterol_mg <dbl> 285.05, 300.61, 215.42, 9.70, 116.89, 3…
## $ serving_size_g <dbl> 120.47, 109.15, 399.43, 314.31, 99.22, …
## $ cooking_method <int> 0, 1, 2, 1, 3, 4, 1, 5, 2, 0, 3, 0, 3, …
## $ prep_time_min <dbl> 16.24, 16.47, 54.35, 27.73, 34.16, 20.9…
## $ cook_time_min <dbl> 110.79, 12.01, 6.09, 103.72, 46.55, 54.…
## $ rating <dbl> 1.31, 1.92, 4.70, 4.85, 3.07, 3.38, 3.8…
## $ name_of_exercise <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1…
## $ sets <dbl> 4.99, 4.01, 5.00, 4.01, 4.99, 4.00, 5.0…
## $ reps <dbl> 20.91, 16.15, 21.90, 16.92, 15.01, 25.1…
## $ benefit <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 2, 4, 9, 10,…
## $ burns_calories_per_30min <dbl> 342.58, 357.16, 359.63, 351.65, 329.36,…
## $ target_muscle_group <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 4, 11…
## $ equipment_needed <int> 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 3, 9, 10,…
## $ difficulty_level <int> 0, 1, 1, 0, 0, 2, 0, 1, 2, 1, 1, 0, 0, …
## $ body_part <int> 0, 1, 2, 3, 4, 2, 3, 5, 1, 6, 4, 3, 4, …
## $ type_of_muscle <int> 0, 0, 1, 2, 3, 3, 3, 4, 5, 6, 7, 8, 6, …
## $ workout <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1…
## $ bmi_calc <dbl> 24.87045, 23.47971, 21.14812, 32.44983,…
## $ cal_from_macros <dbl> 2139.59, 1711.65, 1965.92, 1627.28, 265…
## $ pct_carbs <dbl> 0.5004323, 0.5008501, 0.5006104, 0.4995…
## $ protein_per_kg <dbl> 1.6247893, 1.5140932, 1.6634452, 0.8620…
## $ pct_hrr <dbl> 0.7412365, 0.5512471, 0.5745336, 0.7441…
## $ pct_maxhr <dbl> 0.8359847, 0.7342696, 0.7081239, 0.8111…
## $ cal_balance <dbl> 725.10, -232.91, 805.74, 1206.21, 303.6…
## $ lean_mass_kg <dbl> 47.77739, 40.80980, 44.63558, 63.00743,…
## $ expected_burn <dbl> 685.1600, 978.6184, 654.5266, 773.6300,…
## $ burns_calories_.per_30_min._bc <dbl> 7.260425e+19, 1.020506e+20, 1.079607e+2…
## $ burns_calories_bin <int> 0, 1, 1, 1, 2, 3, 2, 3, 1, 2, 1, 3, 1, …
exer <- c("session_duration_hours", "max_bpm", "avg_bpm", "resting_bpm", "workout_frequency_days_week",
"experience_level", "sets", "reps", "workout_type", "difficulty_level", "workout",
"target_muscle_group", "body_part", "type_of_muscle")
diet <- c("carbs", "proteins", "fats", "calories", "sugar_g", "sodium_mg", "cholesterol_mg",
"meal_type", "diet_type", "daily_meals_frequency", "serving_size_g", "cooking_method",
"prep_time_min", "cook_time_min", "pct_carbs", "protein_per_kg", "cal_from_macros",
"cal_balance")
combined <- union(exer, diet)
setdiff(exer, names(life))
## character(0)
setdiff(diet, names(life))
## character(0)
# modeling
make_formula <- function(y, xs) {
as.formula(paste(y, "~", paste(xs, collapse = " + ")))
}
f_exer <- make_formula("burns_calories_per_30min", exer)
f_diet <- make_formula("burns_calories_per_30min", diet)
f_combined <- make_formula("burns_calories_per_30min", combined)
lm_exer <- lm(f_exer, data = life)
lm_diet <- lm(f_diet, data = life)
lm_combined <- lm(f_combined, data = life)
broom::glance(lm_exer)
## # A tibble: 1 × 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.287 0.286 27.2 574. 0 14 -94402. 188836. 188963.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
broom::glance(lm_diet)
## # A tibble: 1 × 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0258 0.0250 31.7 31.2 2.88e-100 17 -97518. 195074. 1.95e5
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
broom::glance(lm_combined)
## # A tibble: 1 × 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.306 0.305 26.8 284. 0 31 -94123. 188313. 188574.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
# combine model stats
model_list <- list(
exercise = lm_exer,
diet = lm_diet,
combined = lm_combined
)
library(broom)
library(dplyr)
library(purrr)
model_comp <- model_list %>%
imap_dfr(~ glance(.x) %>% mutate(model = .y),.id = NULL) %>%
dplyr::select(model, r.squared, adj.r.squared, sigma, # residual std error
statistic, # F-statistic
p.value, # global F-test p-value
AIC, BIC) %>%
arrange(desc(adj.r.squared))
model_comp
## # A tibble: 3 × 8
## model r.squared adj.r.squared sigma statistic p.value AIC BIC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 combined 0.306 0.305 26.8 284. 0 188313. 188574.
## 2 exercise 0.287 0.286 27.2 574. 0 188836. 188963.
## 3 diet 0.0258 0.0250 31.7 31.2 2.88e-100 195074. 195224.
rmse <- function(mod) {
sqrt(mean(residuals(mod)^2))
}
rmse_tbl <- model_list %>%
imap_dfr(~tibble(model = .y, rmse = rmse(.x)))
rmse_tbl
## # A tibble: 3 × 2
## model rmse
## <chr> <dbl>
## 1 exercise 27.1
## 2 diet 31.7
## 3 combined 26.8
# merging tables
model_summary <- model_comp %>%
left_join(rmse_tbl, by = "model")
model_summary
## # A tibble: 3 × 9
## model r.squared adj.r.squared sigma statistic p.value AIC BIC rmse
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 combined 0.306 0.305 26.8 284. 0 1.88e5 1.89e5 26.8
## 2 exercise 0.287 0.286 27.2 574. 0 1.89e5 1.89e5 27.1
## 3 diet 0.0258 0.0250 31.7 31.2 2.88e-100 1.95e5 1.95e5 31.7
plot_diagnostics <- function(model, title_prefix = "") {
aug <- augment(model)
p1 <- ggplot(aug, aes(.fitted, .resid)) + geom_point(alpha = 0.4) + geom_hline(yintercept = 0,
linetype = "dashed") + labs(title = paste0(title_prefix, "Residuals vs Fitted"),
x = "Fitted Values", y = "Residuals") + theme_minimal()
p2 <- ggplot(aug, aes(sample = .resid)) + stat_qq(alpha = 0.4) + stat_qq_line() +
labs(title = paste0(title_prefix, "QQ Plot of Residuals"), x = "Theoretical Quantiles",
y = "Sample Quantiles") + theme_minimal()
p3 <- ggplot(aug, aes(.hat, .cooksd)) + geom_point(alpha = 0.4) + labs(title = paste0(title_prefix,
"Leverage vs Cook's Distance"), x = "Leverage (Hat Values)", y = "Cook's Distance") +
theme_minimal()
list(residuals_plot = p1, qq_plot = p2, leverage_plot = p3)
}
# exercise diagnositic
diag_ex <- plot_diagnostics(lm_exer, "Exercise Model: ")
diag_ex$residuals_plot
diag_ex$qq_plot
diag_ex$leverage_plot
# diet diagnostic
diag_diet <- plot_diagnostics(lm_diet, "Diet Model: ")
diag_diet
## $residuals_plot
##
## $qq_plot
##
## $leverage_plot
# combined diagnostic
diag_combined <- plot_diagnostics(lm_combined, "Combined Model: ")
diag_combined
## $residuals_plot
##
## $qq_plot
##
## $leverage_plot
# predicted regression on exercise augment adds fitted values and residuals
aug_exer <- augment(lm_exer)
ggplot(aug_exer, aes(x = .fitted, y = burns_calories_per_30min)) + geom_point(alpha = 0.4,
color = "steelblue") + geom_abline(slope = 1, intercept = 0, linetype = "dashed",
color = "red") + labs(title = "Exercise Model: Predicted vs Actual", x = "Predicted Calorie Burn",
y = "Actual Calorie Burn") + theme_minimal()
aug_diet <- augment(lm_diet)
ggplot(aug_diet, aes(x = .fitted, y = burns_calories_per_30min)) + geom_point(alpha = 0.4,
color = "steelblue") + geom_abline(slope = 1, intercept = 0, linetype = "dashed",
color = "red") + labs(title = "Diet Model: Predicted vs Actual", x = "Predicted Calorie Burn",
y = "Actual Calorie Burn") + theme_minimal()
aug_combined <- augment(lm_combined)
ggplot(aug_combined, aes(x = .fitted, y = burns_calories_per_30min)) + geom_point(alpha = 0.4,
color = "steelblue") + geom_abline(slope = 1, intercept = 0, linetype = "dashed",
color = "red") + labs(title = "Exercise & Diet Model: Predicted vs Actual", x = "Predicted Calorie Burn",
y = "Actual Calorie Burn") + theme_minimal()
# p-values for predictors separately
tidy(lm_exer) %>%
arrange(p.value)
## # A tibble: 15 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 212. 4.48 47.4 0
## 2 sets 22.5 0.340 66.3 0
## 3 reps 2.23 0.0535 41.6 0
## 4 session_duration_hours -7.56 0.865 -8.73 2.75e-18
## 5 avg_bpm -0.107 0.0135 -7.93 2.32e-15
## 6 experience_level 4.25 0.563 7.55 4.51e-14
## 7 resting_bpm 0.167 0.0264 6.30 2.99e-10
## 8 workout_frequency_days_week -0.959 0.385 -2.49 1.28e- 2
## 9 type_of_muscle -0.116 0.0514 -2.26 2.36e- 2
## 10 difficulty_level 0.276 0.235 1.17 2.42e- 1
## 11 body_part 0.102 0.0959 1.06 2.87e- 1
## 12 max_bpm -0.00733 0.0168 -0.437 6.62e- 1
## 13 workout_type 0.0742 0.171 0.434 6.65e- 1
## 14 target_muscle_group 0.00623 0.0184 0.339 7.35e- 1
## 15 workout 0.000891 0.0125 0.0710 9.43e- 1
tidy(lm_diet) %>%
arrange(p.value)
## # A tibble: 19 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 sugar_g -0.197 0.0156 -12.6 2.43e-36
## 2 prep_time_min -0.116 0.0136 -8.51 1.87e-17
## 3 daily_meals_frequency 2.48 0.357 6.95 3.73e-12
## 4 cholesterol_mg -0.0144 0.00258 -5.57 2.59e- 8
## 5 protein_per_kg -6.50 1.36 -4.77 1.86e- 6
## 6 sodium_mg -0.00121 0.000314 -3.86 1.13e- 4
## 7 serving_size_g -0.00629 0.00195 -3.23 1.24e- 3
## 8 diet_type 0.271 0.132 2.05 4.01e- 2
## 9 (Intercept) 752. 369. 2.04 4.15e- 2
## 10 cook_time_min -0.00958 0.00674 -1.42 1.55e- 1
## 11 pct_carbs -805. 738. -1.09 2.75e- 1
## 12 carbs 0.834 0.789 1.06 2.91e- 1
## 13 fats -1.85 1.85 -0.998 3.18e- 1
## 14 calories 0.00108 0.00112 0.964 3.35e- 1
## 15 proteins -0.762 0.872 -0.873 3.82e- 1
## 16 meal_type -0.0603 0.200 -0.301 7.63e- 1
## 17 cal_balance -0.000133 0.000450 -0.295 7.68e- 1
## 18 cooking_method -0.0249 0.113 -0.220 8.26e- 1
## 19 cal_from_macros NA NA NA NA
tidy(lm_combined) %>%
arrange(p.value)
## # A tibble: 33 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 sets 22.4 0.337 66.4 0
## 2 reps 2.23 0.0532 41.8 0
## 3 sugar_g -0.162 0.0133 -12.2 4.77e-34
## 4 avg_bpm -0.121 0.0134 -9.03 1.81e-19
## 5 prep_time_min -0.0937 0.0115 -8.14 4.02e-16
## 6 resting_bpm 0.187 0.0262 7.15 9.29e-13
## 7 protein_per_kg -7.31 1.16 -6.32 2.67e-10
## 8 experience_level 3.29 0.571 5.75 8.95e- 9
## 9 session_duration_hours -7.28 1.27 -5.74 9.40e- 9
## 10 daily_meals_frequency 1.40 0.302 4.62 3.89e- 6
## # ℹ 23 more rows
life %>%
ggplot(aes(x = session_duration_hours, y = burns_calories_per_30min)) + geom_point(alpha = 0.3) +
geom_smooth(method = "lm", formula = y ~ x, color = "red") + labs(title = "Linear Trend: Session Duration vs Calorie Burn",
x = "Session Duration (hours)", y = "Calorie Burn per 30min") + theme_minimal()
plot_effect <- function(model, var, title_prefix = "") {
eff <- effects::effect(var, model, xlevels = 20)
df_eff <- as.data.frame(eff)
ggplot(df_eff, aes_string(x = var, y = "fit")) + geom_line() + geom_ribbon(aes(ymin = lower,
ymax = upper), alpha = 0.2) + labs(title = paste0(title_prefix, "Effect of ",
var), x = var, y = "Predicted Calorie Burn per 30 min") + theme_minimal()
}
# examples:
plot_effect(lm_exer, "session_duration_hours", "Exercise Model: ")
plot_effect(lm_exer, "max_bpm", "Exercise Model: ")
plot_effect(lm_diet, "carbs", "Diet Model: ")
plot_effect(lm_diet, "calories", "Diet Model: ")
eff_carbs <- effects::effect("carbs", lm_diet, xlevels = 20)
eff_carbs_df <- as.data.frame(eff_carbs)
ggplot(eff_carbs_df, aes(x = carbs, y = fit)) + geom_line() + geom_ribbon(aes(ymin = lower,
ymax = upper), alpha = 0.2) + labs(title = "Effect of Carbs on Calorie Burn (Diet Model)",
x = "Carbs (g)", y = "Predicted Calorie Burn per 30 min") + theme_minimal()
summary(eff_carbs_df)
## carbs fit se lower upper
## Min. :140.0 Min. : NA Min. : NA Min. : NA Min. : NA
## 1st Qu.:217.5 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
## Median :300.0 Median : NA Median : NA Median : NA Median : NA
## Mean :300.0 Mean :NaN Mean :NaN Mean :NaN Mean :NaN
## 3rd Qu.:382.5 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
## Max. :460.0 Max. : NA Max. : NA Max. : NA Max. : NA
## NA's :20 NA's :20 NA's :20 NA's :20
head(eff_carbs_df)
## carbs fit se lower upper
## 1 140 NA NA NA NA
## 2 150 NA NA NA NA
## 3 170 NA NA NA NA
## 4 190 NA NA NA NA
## 5 210 NA NA NA NA
## 6 220 NA NA NA NA
# turns out carbs are highly collinear with other diet variables
# splitting the data
set.seed(400)
life_split <- initial_split(life, prop = 0.8)
life_train <- training(life_split)
life_test <- testing(life_split)
lm_exer_train <- lm(f_exer, data = life_train)
lm_diet_train <- lm(f_diet, data = life_train)
lm_combined_train <- lm(f_combined, data = life_train)
# testing
test_results <- tibble(model = c("exercise", "diet", "combined"), rmse = c(rmse_vec(life_test$burns_calories_per_30min,
predict(lm_exer_train, life_test)), rmse_vec(life_test$burns_calories_per_30min,
predict(lm_diet_train, life_test)), rmse_vec(life_test$burns_calories_per_30min,
predict(lm_combined_train, life_test))))
test_results
## # A tibble: 3 × 2
## model rmse
## <chr> <dbl>
## 1 exercise 26.9
## 2 diet 31.2
## 3 combined 26.5